Package Bio :: Package Restriction :: Module Restriction
[hide private]
[frames] | no frames]

Source Code for Module Bio.Restriction.Restriction

   1  #!/usr/bin/env python 
   2  # 
   3  #      Restriction Analysis Libraries. 
   4  #      Copyright (C) 2004. Frederic Sohm. 
   5  # 
   6  # This code is part of the Biopython distribution and governed by its 
   7  # license.  Please see the LICENSE file that should have been included 
   8  # as part of this package. 
   9  # 
  10   
  11  """Restriction Enzyme classes. 
  12   
  13  Notes about the diverses class of the restriction enzyme implementation:: 
  14   
  15              RestrictionType is the type of all restriction enzymes. 
  16          ---------------------------------------------------------------------------- 
  17              AbstractCut implements some methods that are common to all enzymes. 
  18          ---------------------------------------------------------------------------- 
  19              NoCut, OneCut,TwoCuts   represent the number of double strand cuts 
  20                                      produced by the enzyme. 
  21                                      they correspond to the 4th field of the rebase 
  22                                      record emboss_e.NNN. 
  23                      0->NoCut    : the enzyme is not characterised. 
  24                      2->OneCut   : the enzyme produce one double strand cut. 
  25                      4->TwoCuts  : two double strand cuts. 
  26          ---------------------------------------------------------------------------- 
  27              Meth_Dep, Meth_Undep    represent the methylation susceptibility to 
  28                                      the enzyme. 
  29                                      Not implemented yet. 
  30          ---------------------------------------------------------------------------- 
  31              Palindromic,            if the site is palindromic or not. 
  32              NotPalindromic          allow some optimisations of the code. 
  33                                      No need to check the reverse strand 
  34                                      with palindromic sites. 
  35          ---------------------------------------------------------------------------- 
  36              Unknown, Blunt,         represent the overhang. 
  37              Ov5, Ov3                Unknown is here for symetry reasons and 
  38                                      correspond to enzymes that are not characterised 
  39                                      in rebase. 
  40          ---------------------------------------------------------------------------- 
  41              Defined, Ambiguous,     represent the sequence of the overhang. 
  42              NotDefined 
  43                                      NotDefined is for enzymes not characterised in 
  44                                      rebase. 
  45   
  46                                      Defined correspond to enzymes that display a 
  47                                      constant overhang whatever the sequence. 
  48                                      ex : EcoRI. G^AATTC -> overhang :AATT 
  49                                                  CTTAA^G 
  50   
  51                                      Ambiguous : the overhang varies with the 
  52                                      sequence restricted. 
  53                                      Typically enzymes which cut outside their 
  54                                      restriction site or (but not always) 
  55                                      inside an ambiguous site. 
  56                                      ex: 
  57                                      AcuI CTGAAG(22/20)  -> overhang : NN 
  58                                      AasI GACNNN^NNNGTC  -> overhang : NN 
  59                                           CTGN^NNNNNCAG 
  60   
  61                  note : these 3 classes refers to the overhang not the site. 
  62                     So the enzyme ApoI (RAATTY) is defined even if its restriction 
  63                     site is ambiguous. 
  64   
  65                          ApoI R^AATTY -> overhang : AATT -> Defined 
  66                               YTTAA^R 
  67                     Accordingly, blunt enzymes are always Defined even 
  68                     when they cut outside their restriction site. 
  69          ---------------------------------------------------------------------------- 
  70              Not_available,          as found in rebase file emboss_r.NNN files. 
  71              Commercially_available 
  72                                      allow the selection of the enzymes according to 
  73                                      their suppliers to reduce the quantity 
  74                                      of results. 
  75                                      Also will allow the implementation of buffer 
  76                                      compatibility tables. Not implemented yet. 
  77   
  78                                      the list of suppliers is extracted from 
  79                                      emboss_s.NNN 
  80          ---------------------------------------------------------------------------- 
  81  """ 
  82   
  83  from __future__ import print_function 
  84  from Bio._py3k import zip 
  85  from Bio._py3k import filter 
  86  from Bio._py3k import range 
  87   
  88  import re 
  89  import itertools 
  90   
  91  from Bio.Seq import Seq, MutableSeq 
  92  from Bio.Alphabet import IUPAC 
  93   
  94  from Bio.Restriction.Restriction_Dictionary import rest_dict as enzymedict 
  95  from Bio.Restriction.Restriction_Dictionary import typedict 
  96  from Bio.Restriction.Restriction_Dictionary import suppliers as suppliers_dict 
  97  # TODO: Consider removing this wildcard import. 
  98  from Bio.Restriction.RanaConfig import * 
  99  from Bio.Restriction.PrintFormat import PrintFormat 
 100   
 101  __docformat__ = "restructuredtext en" 
102 103 # Used to use Bio.Restriction.DNAUtils.check_bases (and expose it under this 104 # namespace), but have deprecated that module. 105 106 107 -def _check_bases(seq_string):
108 """Check characters in a string (PRIVATE). 109 110 Remove digits and white space present in string. Allows any valid ambiguous 111 IUPAC DNA single letters codes (ABCDGHKMNRSTVWY, lower case are converted). 112 113 Other characters (e.g. symbols) trigger a TypeError. 114 115 Returns the string WITH A LEADING SPACE (!). This is for backwards 116 compatibility, and may in part be explained by the fact that 117 Bio.Restriction doesn't use zero based counting. 118 """ 119 # Remove white space and make upper case: 120 seq_string = "".join(seq_string.split()).upper() 121 # Remove digits 122 for c in "0123456789": 123 seq_string = seq_string.replace(c, "") 124 # Check only allowed IUPAC letters 125 if not set(seq_string).issubset(set("ABCDGHKMNRSTVWY")): 126 raise TypeError("Invalid character found in %s" % repr(seq_string)) 127 return " " + seq_string
128 129 130 matching = {'A': 'ARWMHVDN', 'C': 'CYSMHBVN', 'G': 'GRSKBVDN', 131 'T': 'TYWKHBDN', 'R': 'ABDGHKMNSRWV', 'Y': 'CBDHKMNSTWVY', 132 'W': 'ABDHKMNRTWVY', 'S': 'CBDGHKMNSRVY', 'M': 'ACBDHMNSRWVY', 133 'K': 'BDGHKNSRTWVY', 'H': 'ACBDHKMNSRTWVY', 134 'B': 'CBDGHKMNSRTWVY', 'V': 'ACBDGHKMNSRWVY', 135 'D': 'ABDGHKMNSRTWVY', 'N': 'ACBDGHKMNSRTWVY'} 136 137 DNA = Seq
138 139 140 -class FormattedSeq(object):
141 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 142 143 Translate a Bio.Seq into a formatted sequence to be used with Restriction. 144 145 Roughly: 146 remove anything which is not IUPAC alphabet and then add a space 147 in front of the sequence to get a biological index instead of a 148 python index (i.e. index of the first base is 1 not 0). 149 150 Retains information about the shape of the molecule linear (default) 151 or circular. Restriction sites are search over the edges of circular 152 sequence.""" 153
154 - def __init__(self, seq, linear=True):
155 """FormattedSeq(seq, [linear=True])-> new FormattedSeq. 156 157 seq is either a Bio.Seq, Bio.MutableSeq or a FormattedSeq. 158 if seq is a FormattedSeq, linear will have no effect on the 159 shape of the sequence.""" 160 if isinstance(seq, Seq) or isinstance(seq, MutableSeq): 161 stringy = str(seq) 162 self.lower = stringy.islower() 163 # Note this adds a leading space to the sequence (!) 164 self.data = _check_bases(stringy) 165 self.linear = linear 166 self.klass = seq.__class__ 167 self.alphabet = seq.alphabet 168 elif isinstance(seq, FormattedSeq): 169 self.lower = seq.lower 170 self.data = seq.data 171 self.linear = seq.linear 172 self.alphabet = seq.alphabet 173 self.klass = seq.klass 174 else: 175 raise TypeError('expected Seq or MutableSeq, got %s' % type(seq))
176
177 - def __len__(self):
178 return len(self.data) - 1
179
180 - def __repr__(self):
181 return 'FormattedSeq(%s, linear=%s)' % (repr(self[1:]), repr(self.linear))
182
183 - def __eq__(self, other):
184 if isinstance(other, FormattedSeq): 185 if repr(self) == repr(other): 186 return True 187 else: 188 return False 189 return False
190
191 - def circularise(self):
192 """FS.circularise() -> circularise FS""" 193 self.linear = False 194 return
195
196 - def linearise(self):
197 """FS.linearise() -> linearise FS""" 198 self.linear = True 199 return
200
201 - def to_linear(self):
202 """FS.to_linear() -> new linear FS instance""" 203 new = self.__class__(self) 204 new.linear = True 205 return new
206
207 - def to_circular(self):
208 """FS.to_circular() -> new circular FS instance""" 209 new = self.__class__(self) 210 new.linear = False 211 return new
212
213 - def is_linear(self):
214 """FS.is_linear() -> bool. 215 216 True if the sequence will analysed as a linear sequence.""" 217 return self.linear
218
219 - def finditer(self, pattern, size):
220 """FS.finditer(pattern, size) -> list. 221 222 return a list of pattern into the sequence. 223 the list is made of tuple (location, pattern.group). 224 the latter is used with non palindromic sites. 225 pattern is the regular expression pattern corresponding to the 226 enzyme restriction site. 227 size is the size of the restriction enzyme recognition-site size.""" 228 if self.is_linear(): 229 data = self.data 230 else: 231 data = self.data + self.data[1:size] 232 return [(i.start(), i.group) for i in re.finditer(pattern, data)]
233
234 - def __getitem__(self, i):
235 if self.lower: 236 return self.klass((self.data[i]).lower(), self.alphabet) 237 return self.klass(self.data[i], self.alphabet)
238
239 240 -class RestrictionType(type):
241 """RestrictionType. Type from which derives all enzyme classes. 242 243 Implement the operator methods.""" 244
245 - def __init__(cls, name='', bases=(), dct={}):
246 """RE(name, bases, dct) -> RestrictionType instance. 247 248 Not intended to be used in normal operation. The enzymes are 249 instantiated when importing the module. 250 251 see below.""" 252 if "-" in name: 253 raise ValueError("Problem with hyphen in %s as enzyme name" 254 % repr(name)) 255 # 2011/11/26 - Nobody knows what this call was supposed to accomplish, 256 # but all unit tests seem to pass without it. 257 # super(RestrictionType, cls).__init__(cls, name, bases, dct) 258 try: 259 cls.compsite = re.compile(cls.compsite) 260 except Exception as err: 261 raise ValueError("Problem with regular expression, re.compiled(%s)" 262 % repr(cls.compsite))
263
264 - def __add__(cls, other):
265 """RE.__add__(other) -> RestrictionBatch(). 266 267 if other is an enzyme returns a batch of the two enzymes. 268 if other is already a RestrictionBatch add enzyme to it.""" 269 if isinstance(other, RestrictionType): 270 return RestrictionBatch([cls, other]) 271 elif isinstance(other, RestrictionBatch): 272 return other.add_nocheck(cls) 273 else: 274 raise TypeError
275
276 - def __div__(cls, other):
277 """RE.__div__(other) -> list. 278 279 RE/other 280 returns RE.search(other).""" 281 return cls.search(other)
282
283 - def __rdiv__(cls, other):
284 """RE.__rdiv__(other) -> list. 285 286 other/RE 287 returns RE.search(other).""" 288 return cls.search(other)
289
290 - def __truediv__(cls, other):
291 """RE.__truediv__(other) -> list. 292 293 RE/other 294 returns RE.search(other).""" 295 return cls.search(other)
296
297 - def __rtruediv__(cls, other):
298 """RE.__rtruediv__(other) -> list. 299 300 other/RE 301 returns RE.search(other).""" 302 return cls.search(other)
303
304 - def __floordiv__(cls, other):
305 """RE.__floordiv__(other) -> list. 306 307 RE//other 308 returns RE.catalyse(other).""" 309 return cls.catalyse(other)
310
311 - def __rfloordiv__(cls, other):
312 """RE.__rfloordiv__(other) -> list. 313 314 other//RE 315 returns RE.catalyse(other).""" 316 return cls.catalyse(other)
317
318 - def __str__(cls):
319 """RE.__str__() -> str. 320 321 return the name of the enzyme.""" 322 return cls.__name__
323
324 - def __repr__(cls):
325 """RE.__repr__() -> str. 326 327 used with eval or exec will instantiate the enzyme.""" 328 return "%s" % cls.__name__
329
330 - def __len__(cls):
331 """RE.__len__() -> int. 332 333 length of the recognition site.""" 334 return cls.size
335
336 - def __hash__(cls):
337 # Python default is to use id(...) 338 # This is consistent with the __eq__ implementation 339 return id(cls)
340
341 - def __eq__(cls, other):
342 """RE == other -> bool 343 344 True if RE and other are the same enzyme. 345 346 Specifically this checks they are the same Python object. 347 """ 348 # assert (id(cls)==id(other)) == (other is cls) == (cls is other) 349 return id(cls) == id(other)
350
351 - def __ne__(cls, other):
352 """RE != other -> bool. 353 isoschizomer strict, same recognition site, same restriction -> False 354 all the other-> True 355 356 WARNING - This is not the inverse of the __eq__ method. 357 """ 358 if not isinstance(other, RestrictionType): 359 return True 360 elif cls.charac == other.charac: 361 return False 362 else: 363 return True
364
365 - def __rshift__(cls, other):
366 """RE >> other -> bool. 367 368 neoschizomer : same recognition site, different restriction. -> True 369 all the others : -> False""" 370 if not isinstance(other, RestrictionType): 371 return False 372 elif cls.site == other.site and cls.charac != other.charac: 373 return True 374 else: 375 return False
376
377 - def __mod__(cls, other):
378 """a % b -> bool. 379 380 Test compatibility of the overhang of a and b. 381 True if a and b have compatible overhang.""" 382 if not isinstance(other, RestrictionType): 383 raise TypeError( 384 'expected RestrictionType, got %s instead' % type(other)) 385 return cls._mod1(other)
386
387 - def __ge__(cls, other):
388 """a >= b -> bool. 389 390 a is greater or equal than b if the a site is longer than b site. 391 if their site have the same length sort by alphabetical order of their 392 names.""" 393 if not isinstance(other, RestrictionType): 394 raise NotImplementedError 395 if len(cls) > len(other): 396 return True 397 elif cls.size == len(other) and cls.__name__ >= other.__name__: 398 return True 399 else: 400 return False
401
402 - def __gt__(cls, other):
403 """a > b -> bool. 404 405 sorting order: 406 1. size of the recognition site. 407 2. if equal size, alphabetical order of the names.""" 408 if not isinstance(other, RestrictionType): 409 raise NotImplementedError 410 if len(cls) > len(other): 411 return True 412 elif cls.size == len(other) and cls.__name__ > other.__name__: 413 return True 414 else: 415 return False
416
417 - def __le__(cls, other):
418 """a <= b -> bool. 419 420 sorting order: 421 1. size of the recognition site. 422 2. if equal size, alphabetical order of the names.""" 423 if not isinstance(other, RestrictionType): 424 raise NotImplementedError 425 elif len(cls) < len(other): 426 return True 427 elif len(cls) == len(other) and cls.__name__ <= other.__name__: 428 return True 429 else: 430 return False
431
432 - def __lt__(cls, other):
433 """a < b -> bool. 434 435 sorting order: 436 1. size of the recognition site. 437 2. if equal size, alphabetical order of the names.""" 438 if not isinstance(other, RestrictionType): 439 raise NotImplementedError 440 elif len(cls) < len(other): 441 return True 442 elif len(cls) == len(other) and cls.__name__ < other.__name__: 443 return True 444 else: 445 return False
446
447 448 -class AbstractCut(RestrictionType):
449 """Implement the methods that are common to all restriction enzymes. 450 451 All the methods are classmethod. 452 453 For internal use only. Not meant to be instantiate.""" 454 455 @classmethod
456 - def search(cls, dna, linear=True):
457 """RE.search(dna, linear=True) -> list. 458 459 return a list of all the site of RE in dna. Compensate for circular 460 sequences and so on. 461 462 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 463 464 if linear is False, the restriction sites than span over the boundaries 465 will be included. 466 467 The positions are the first base of the 3' fragment, 468 i.e. the first base after the position the enzyme will cut. """ 469 # 470 # Separating search from _search allow a (very limited) optimisation 471 # of the search when using a batch of restriction enzymes. 472 # in this case the DNA is tested once by the class which implements 473 # the batch instead of being tested by each enzyme single. 474 # see RestrictionBatch.search() for example. 475 # 476 if isinstance(dna, FormattedSeq): 477 cls.dna = dna 478 return cls._search() 479 else: 480 cls.dna = FormattedSeq(dna, linear) 481 return cls._search()
482 483 @classmethod
484 - def all_suppliers(self):
485 """RE.all_suppliers -> print all the suppliers of R""" 486 supply = sorted(x[0] for x in suppliers_dict.values()) 487 print(",\n".join(supply)) 488 return
489 490 @classmethod
491 - def is_equischizomer(self, other):
492 """RE.is_equischizomers(other) -> bool. 493 494 True if other is an isoschizomer of RE. 495 False else. 496 497 equischizomer <=> same site, same position of restriction.""" 498 return not self != other
499 500 @classmethod
501 - def is_neoschizomer(self, other):
502 """RE.is_neoschizomers(other) -> bool. 503 504 True if other is an isoschizomer of RE. 505 False else. 506 507 neoschizomer <=> same site, different position of restriction.""" 508 return self >> other
509 510 @classmethod
511 - def is_isoschizomer(self, other):
512 """RE.is_isoschizomers(other) -> bool. 513 514 True if other is an isoschizomer of RE. 515 False else. 516 517 isoschizomer <=> same site.""" 518 return (not self != other) or self >> other
519 520 @classmethod
521 - def equischizomers(self, batch=None):
522 """RE.equischizomers([batch]) -> list. 523 524 return a tuple of all the isoschizomers of RE. 525 if batch is supplied it is used instead of the default AllEnzymes. 526 527 equischizomer <=> same site, same position of restriction.""" 528 if not batch: 529 batch = AllEnzymes 530 r = [x for x in batch if not self != x] 531 i = r.index(self) 532 del r[i] 533 r.sort() 534 return r
535 536 @classmethod
537 - def neoschizomers(self, batch=None):
538 """RE.neoschizomers([batch]) -> list. 539 540 return a tuple of all the neoschizomers of RE. 541 if batch is supplied it is used instead of the default AllEnzymes. 542 543 neoschizomer <=> same site, different position of restriction.""" 544 if not batch: 545 batch = AllEnzymes 546 r = sorted(x for x in batch if self >> x) 547 return r
548 549 @classmethod
550 - def isoschizomers(self, batch=None):
551 """RE.isoschizomers([batch]) -> list. 552 553 return a tuple of all the equischizomers and neoschizomers of RE. 554 if batch is supplied it is used instead of the default AllEnzymes.""" 555 if not batch: 556 batch = AllEnzymes 557 r = [x for x in batch if (self >> x) or (not self != x)] 558 i = r.index(self) 559 del r[i] 560 r.sort() 561 return r
562 563 @classmethod
564 - def frequency(self):
565 """RE.frequency() -> int. 566 567 frequency of the site.""" 568 return self.freq
569
570 571 -class NoCut(AbstractCut):
572 """Implement the methods specific to the enzymes that do not cut. 573 574 These enzymes are generally enzymes that have been only partially 575 characterised and the way they cut the DNA is unknow or enzymes for 576 which the pattern of cut is to complex to be recorded in Rebase 577 (ncuts values of 0 in emboss_e.###). 578 579 When using search() with these enzymes the values returned are at the start of 580 the restriction site. 581 582 Their catalyse() method returns a TypeError. 583 584 Unknown and NotDefined are also part of the base classes of these enzymes. 585 586 Internal use only. Not meant to be instantiated.""" 587 588 @classmethod
589 - def cut_once(self):
590 """RE.cut_once() -> bool. 591 592 True if the enzyme cut the sequence one time on each strand.""" 593 return False
594 595 @classmethod
596 - def cut_twice(self):
597 """RE.cut_twice() -> bool. 598 599 True if the enzyme cut the sequence twice on each strand.""" 600 return False
601 602 @classmethod
603 - def _modify(self, location):
604 """RE._modify(location) -> int. 605 606 for internal use only. 607 608 location is an integer corresponding to the location of the match for 609 the enzyme pattern in the sequence. 610 _modify returns the real place where the enzyme will cut. 611 612 example:: 613 614 EcoRI pattern : GAATTC 615 EcoRI will cut after the G. 616 so in the sequence: 617 ______ 618 GAATACACGGAATTCGA 619 | 620 10 621 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 622 EcoRI cut after the G so: 623 EcoRI._modify(10) -> 11. 624 625 if the enzyme cut twice _modify will returns two integer corresponding 626 to each cutting site. 627 """ 628 yield location
629 630 @classmethod
631 - def _rev_modify(self, location):
632 """RE._rev_modify(location) -> generator of int. 633 634 for internal use only. 635 636 as _modify for site situated on the antiparallel strand when the 637 enzyme is not palindromic 638 """ 639 yield location
640 641 @classmethod
642 - def characteristic(self):
643 """RE.characteristic() -> tuple. 644 645 the tuple contains the attributes: 646 fst5 -> first 5' cut ((current strand) or None 647 fst3 -> first 3' cut (complementary strand) or None 648 scd5 -> second 5' cut (current strand) or None 649 scd5 -> second 3' cut (complementary strand) or None 650 site -> recognition site.""" 651 return None, None, None, None, self.site
652
653 654 -class OneCut(AbstractCut):
655 """Implement the methods specific to the enzymes that cut the DNA only once 656 657 Correspond to ncuts values of 2 in emboss_e.### 658 659 Internal use only. Not meant to be instantiated.""" 660 661 @classmethod
662 - def cut_once(self):
663 """RE.cut_once() -> bool. 664 665 True if the enzyme cut the sequence one time on each strand.""" 666 return True
667 668 @classmethod
669 - def cut_twice(self):
670 """RE.cut_twice() -> bool. 671 672 True if the enzyme cut the sequence twice on each strand.""" 673 return False
674 675 @classmethod
676 - def _modify(self, location):
677 """RE._modify(location) -> int. 678 679 for internal use only. 680 681 location is an integer corresponding to the location of the match for 682 the enzyme pattern in the sequence. 683 _modify returns the real place where the enzyme will cut. 684 685 example:: 686 687 EcoRI pattern : GAATTC 688 EcoRI will cut after the G. 689 so in the sequence: 690 ______ 691 GAATACACGGAATTCGA 692 | 693 10 694 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 695 EcoRI cut after the G so: 696 EcoRI._modify(10) -> 11. 697 698 if the enzyme cut twice _modify will returns two integer corresponding 699 to each cutting site. 700 """ 701 yield location + self.fst5
702 703 @classmethod
704 - def _rev_modify(self, location):
705 """RE._rev_modify(location) -> generator of int. 706 707 for internal use only. 708 709 as _modify for site situated on the antiparallel strand when the 710 enzyme is not palindromic 711 """ 712 yield location - self.fst3
713 714 @classmethod
715 - def characteristic(self):
716 """RE.characteristic() -> tuple. 717 718 the tuple contains the attributes: 719 fst5 -> first 5' cut ((current strand) or None 720 fst3 -> first 3' cut (complementary strand) or None 721 scd5 -> second 5' cut (current strand) or None 722 scd5 -> second 3' cut (complementary strand) or None 723 site -> recognition site.""" 724 return self.fst5, self.fst3, None, None, self.site
725
726 727 -class TwoCuts(AbstractCut):
728 """Implement the methods specific to the enzymes that cut the DNA twice 729 730 Correspond to ncuts values of 4 in emboss_e.### 731 732 Internal use only. Not meant to be instantiated.""" 733 734 @classmethod
735 - def cut_once(self):
736 """RE.cut_once() -> bool. 737 738 True if the enzyme cut the sequence one time on each strand.""" 739 return False
740 741 @classmethod
742 - def cut_twice(self):
743 """RE.cut_twice() -> bool. 744 745 True if the enzyme cut the sequence twice on each strand.""" 746 return True
747 748 @classmethod
749 - def _modify(self, location):
750 """RE._modify(location) -> int. 751 752 for internal use only. 753 754 location is an integer corresponding to the location of the match for 755 the enzyme pattern in the sequence. 756 _modify returns the real place where the enzyme will cut. 757 758 example:: 759 760 EcoRI pattern : GAATTC 761 EcoRI will cut after the G. 762 so in the sequence: 763 ______ 764 GAATACACGGAATTCGA 765 | 766 10 767 dna.finditer(GAATTC, 6) will return 10 as G is the 10th base 768 EcoRI cut after the G so: 769 EcoRI._modify(10) -> 11. 770 771 if the enzyme cut twice _modify will returns two integer corresponding 772 to each cutting site. 773 """ 774 yield location + self.fst5 775 yield location + self.scd5
776 777 @classmethod
778 - def _rev_modify(self, location):
779 """RE._rev_modify(location) -> generator of int. 780 781 for internal use only. 782 783 as _modify for site situated on the antiparallel strand when the 784 enzyme is not palindromic 785 """ 786 yield location - self.fst3 787 yield location - self.scd3
788 789 @classmethod
790 - def characteristic(self):
791 """RE.characteristic() -> tuple. 792 793 the tuple contains the attributes: 794 fst5 -> first 5' cut ((current strand) or None 795 fst3 -> first 3' cut (complementary strand) or None 796 scd5 -> second 5' cut (current strand) or None 797 scd5 -> second 3' cut (complementary strand) or None 798 site -> recognition site.""" 799 return self.fst5, self.fst3, self.scd5, self.scd3, self.site
800
801 802 -class Meth_Dep(AbstractCut):
803 """Implement the information about methylation. 804 805 Enzymes of this class possess a site which is methylable.""" 806 807 @classmethod
808 - def is_methylable(self):
809 """RE.is_methylable() -> bool. 810 811 True if the recognition site is a methylable.""" 812 return True
813
814 815 -class Meth_Undep(AbstractCut):
816 """Implement information about methylation sensitibility. 817 818 Enzymes of this class are not sensible to methylation.""" 819 820 @classmethod
821 - def is_methylable(self):
822 """RE.is_methylable() -> bool. 823 824 True if the recognition site is a methylable.""" 825 return False
826
827 828 -class Palindromic(AbstractCut):
829 """Implement the methods specific to the enzymes which are palindromic 830 831 palindromic means : the recognition site and its reverse complement are 832 identical. 833 Remarks : an enzyme with a site CGNNCG is palindromic even if some 834 of the sites that it will recognise are not. 835 for example here : CGAACG 836 837 Internal use only. Not meant to be instantiated.""" 838 839 @classmethod
840 - def _search(self):
841 """RE._search() -> list. 842 843 for internal use only. 844 845 implement the search method for palindromic and non palindromic enzyme. 846 """ 847 siteloc = self.dna.finditer(self.compsite, self.size) 848 self.results = [r for s, g in siteloc for r in self._modify(s)] 849 if self.results: 850 self._drop() 851 return self.results
852 853 @classmethod
854 - def is_palindromic(self):
855 """RE.is_palindromic() -> bool. 856 857 True if the recognition site is a palindrom.""" 858 return True
859
860 861 -class NonPalindromic(AbstractCut):
862 """Implement the methods specific to the enzymes which are not palindromic 863 864 palindromic means : the recognition site and its reverse complement are 865 identical. 866 867 Internal use only. Not meant to be instantiated.""" 868 869 @classmethod
870 - def _search(self):
871 """RE._search() -> list. 872 873 for internal use only. 874 875 implement the search method for palindromic and non palindromic enzyme. 876 """ 877 iterator = self.dna.finditer(self.compsite, self.size) 878 self.results = [] 879 modif = self._modify 880 revmodif = self._rev_modify 881 s = str(self) 882 self.on_minus = [] 883 for start, group in iterator: 884 if group(s): 885 self.results += [r for r in modif(start)] 886 else: 887 self.on_minus += [r for r in revmodif(start)] 888 self.results += self.on_minus 889 if self.results: 890 self.results.sort() 891 self._drop() 892 return self.results
893 894 @classmethod
895 - def is_palindromic(self):
896 """RE.is_palindromic() -> bool. 897 898 True if the recognition site is a palindrom.""" 899 return False
900
901 902 -class Unknown(AbstractCut):
903 """Implement the methods specific to the enzymes for which the overhang 904 is unknown. 905 906 These enzymes are also NotDefined and NoCut. 907 908 Internal use only. Not meant to be instantiated.""" 909 910 @classmethod
911 - def catalyse(self, dna, linear=True):
912 """RE.catalyse(dna, linear=True) -> tuple of DNA. 913 RE.catalyze(dna, linear=True) -> tuple of DNA. 914 915 return a tuple of dna as will be produced by using RE to restrict the 916 dna. 917 918 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 919 920 if linear is False, the sequence is considered to be circular and the 921 output will be modified accordingly.""" 922 raise NotImplementedError('%s restriction is unknown.' 923 % self.__name__)
924 catalyze = catalyse 925 926 @classmethod
927 - def is_blunt(self):
928 """RE.is_blunt() -> bool. 929 930 True if the enzyme produces blunt end. 931 932 see also: 933 RE.is_3overhang() 934 RE.is_5overhang() 935 RE.is_unknown()""" 936 return False
937 938 @classmethod
939 - def is_5overhang(self):
940 """RE.is_5overhang() -> bool. 941 942 True if the enzyme produces 5' overhang sticky end. 943 944 see also: 945 RE.is_3overhang() 946 RE.is_blunt() 947 RE.is_unknown()""" 948 return False
949 950 @classmethod
951 - def is_3overhang(self):
952 """RE.is_3overhang() -> bool. 953 954 True if the enzyme produces 3' overhang sticky end. 955 956 see also: 957 RE.is_5overhang() 958 RE.is_blunt() 959 RE.is_unknown()""" 960 return False
961 962 @classmethod
963 - def overhang(self):
964 """RE.overhang() -> str. type of overhang of the enzyme., 965 966 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 967 return 'unknown'
968 969 @classmethod
970 - def compatible_end(self):
971 """RE.compatible_end() -> list. 972 973 list of all the enzymes that share compatible end with RE.""" 974 return []
975 976 @classmethod
977 - def _mod1(self, other):
978 """RE._mod1(other) -> bool. 979 980 for internal use only 981 982 test for the compatibility of restriction ending of RE and other.""" 983 return False
984
985 986 -class Blunt(AbstractCut):
987 """Implement the methods specific to the enzymes for which the overhang 988 is blunt. 989 990 The enzyme cuts the + strand and the - strand of the DNA at the same 991 place. 992 993 Internal use only. Not meant to be instantiated.""" 994 995 @classmethod
996 - def catalyse(self, dna, linear=True):
997 """RE.catalyse(dna, linear=True) -> tuple of DNA. 998 RE.catalyze(dna, linear=True) -> tuple of DNA. 999 1000 return a tuple of dna as will be produced by using RE to restrict the 1001 dna. 1002 1003 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1004 1005 if linear is False, the sequence is considered to be circular and the 1006 output will be modified accordingly.""" 1007 r = self.search(dna, linear) 1008 d = self.dna 1009 if not r: 1010 return d[1:], 1011 fragments = [] 1012 length = len(r) - 1 1013 if d.is_linear(): 1014 # 1015 # START of the sequence to FIRST site. 1016 # 1017 fragments.append(d[1:r[0]]) 1018 if length: 1019 # 1020 # if more than one site add them. 1021 # 1022 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1023 # 1024 # LAST site to END of the sequence. 1025 # 1026 fragments.append(d[r[-1]:]) 1027 else: 1028 # 1029 # circular : bridge LAST site to FIRST site. 1030 # 1031 fragments.append(d[r[-1]:] + d[1:r[0]]) 1032 if not length: 1033 # 1034 # one site we finish here. 1035 # 1036 return tuple(fragments) 1037 # 1038 # add the others. 1039 # 1040 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1041 return tuple(fragments)
1042 catalyze = catalyse 1043 1044 @classmethod
1045 - def is_blunt(self):
1046 """RE.is_blunt() -> bool. 1047 1048 True if the enzyme produces blunt end. 1049 1050 see also: 1051 RE.is_3overhang() 1052 RE.is_5overhang() 1053 RE.is_unknown()""" 1054 return True
1055 1056 @classmethod
1057 - def is_5overhang(self):
1058 """RE.is_5overhang() -> bool. 1059 1060 True if the enzyme produces 5' overhang sticky end. 1061 1062 see also: 1063 RE.is_3overhang() 1064 RE.is_blunt() 1065 RE.is_unknown()""" 1066 return False
1067 1068 @classmethod
1069 - def is_3overhang(self):
1070 """RE.is_3overhang() -> bool. 1071 1072 True if the enzyme produces 3' overhang sticky end. 1073 1074 see also: 1075 RE.is_5overhang() 1076 RE.is_blunt() 1077 RE.is_unknown()""" 1078 return False
1079 1080 @classmethod
1081 - def overhang(self):
1082 """RE.overhang() -> str. type of overhang of the enzyme., 1083 1084 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1085 return 'blunt'
1086 1087 @classmethod
1088 - def compatible_end(self, batch=None):
1089 """RE.compatible_end() -> list. 1090 1091 list of all the enzymes that share compatible end with RE.""" 1092 if not batch: 1093 batch = AllEnzymes 1094 r = sorted(x for x in iter(AllEnzymes) if x.is_blunt()) 1095 return r
1096 1097 @staticmethod
1098 - def _mod1(other):
1099 """RE._mod1(other) -> bool. 1100 1101 for internal use only 1102 1103 test for the compatibility of restriction ending of RE and other.""" 1104 return issubclass(other, Blunt)
1105
1106 1107 -class Ov5(AbstractCut):
1108 """Implement the methods specific to the enzymes for which the overhang 1109 is recessed in 3'. 1110 1111 The enzyme cuts the + strand after the - strand of the DNA. 1112 1113 Internal use only. Not meant to be instantiated.""" 1114 1115 @classmethod
1116 - def catalyse(self, dna, linear=True):
1117 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1118 RE.catalyze(dna, linear=True) -> tuple of DNA. 1119 1120 return a tuple of dna as will be produced by using RE to restrict the 1121 dna. 1122 1123 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1124 1125 if linear is False, the sequence is considered to be circular and the 1126 output will be modified accordingly.""" 1127 r = self.search(dna, linear) 1128 d = self.dna 1129 if not r: 1130 return d[1:], 1131 length = len(r) - 1 1132 fragments = [] 1133 if d.is_linear(): 1134 # 1135 # START of the sequence to FIRST site. 1136 # 1137 fragments.append(d[1:r[0]]) 1138 if length: 1139 # 1140 # if more than one site add them. 1141 # 1142 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1143 # 1144 # LAST site to END of the sequence. 1145 # 1146 fragments.append(d[r[-1]:]) 1147 else: 1148 # 1149 # circular : bridge LAST site to FIRST site. 1150 # 1151 fragments.append(d[r[-1]:] + d[1:r[0]]) 1152 if not length: 1153 # 1154 # one site we finish here. 1155 # 1156 return tuple(fragments) 1157 # 1158 # add the others. 1159 # 1160 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1161 return tuple(fragments)
1162 catalyze = catalyse 1163 1164 @classmethod
1165 - def is_blunt(self):
1166 """RE.is_blunt() -> bool. 1167 1168 True if the enzyme produces blunt end. 1169 1170 see also: 1171 RE.is_3overhang() 1172 RE.is_5overhang() 1173 RE.is_unknown()""" 1174 return False
1175 1176 @classmethod
1177 - def is_5overhang(self):
1178 """RE.is_5overhang() -> bool. 1179 1180 True if the enzyme produces 5' overhang sticky end. 1181 1182 see also: 1183 RE.is_3overhang() 1184 RE.is_blunt() 1185 RE.is_unknown()""" 1186 return True
1187 1188 @classmethod
1189 - def is_3overhang(self):
1190 """RE.is_3overhang() -> bool. 1191 1192 True if the enzyme produces 3' overhang sticky end. 1193 1194 see also: 1195 RE.is_5overhang() 1196 RE.is_blunt() 1197 RE.is_unknown()""" 1198 return False
1199 1200 @classmethod
1201 - def overhang(self):
1202 """RE.overhang() -> str. type of overhang of the enzyme., 1203 1204 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1205 return "5' overhang"
1206 1207 @classmethod
1208 - def compatible_end(self, batch=None):
1209 """RE.compatible_end() -> list. 1210 1211 list of all the enzymes that share compatible end with RE.""" 1212 if not batch: 1213 batch = AllEnzymes 1214 r = sorted(x for x in iter(AllEnzymes) if x.is_5overhang() and x % self) 1215 return r
1216 1217 @classmethod
1218 - def _mod1(self, other):
1219 """RE._mod1(other) -> bool. 1220 1221 for internal use only 1222 1223 test for the compatibility of restriction ending of RE and other.""" 1224 if issubclass(other, Ov5): 1225 return self._mod2(other) 1226 else: 1227 return False
1228
1229 1230 -class Ov3(AbstractCut):
1231 """Implement the methods specific to the enzymes for which the overhang 1232 is recessed in 5'. 1233 1234 The enzyme cuts the - strand after the + strand of the DNA. 1235 1236 Internal use only. Not meant to be instantiated.""" 1237 1238 @classmethod
1239 - def catalyse(self, dna, linear=True):
1240 """RE.catalyse(dna, linear=True) -> tuple of DNA. 1241 RE.catalyze(dna, linear=True) -> tuple of DNA. 1242 1243 return a tuple of dna as will be produced by using RE to restrict the 1244 dna. 1245 1246 dna must be a Bio.Seq.Seq instance or a Bio.Seq.MutableSeq instance. 1247 1248 if linear is False, the sequence is considered to be circular and the 1249 output will be modified accordingly.""" 1250 r = self.search(dna, linear) 1251 d = self.dna 1252 if not r: 1253 return d[1:], 1254 fragments = [] 1255 length = len(r) - 1 1256 if d.is_linear(): 1257 # 1258 # START of the sequence to FIRST site. 1259 # 1260 fragments.append(d[1:r[0]]) 1261 if length: 1262 # 1263 # if more than one site add them. 1264 # 1265 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1266 # 1267 # LAST site to END of the sequence. 1268 # 1269 fragments.append(d[r[-1]:]) 1270 else: 1271 # 1272 # circular : bridge LAST site to FIRST site. 1273 # 1274 fragments.append(d[r[-1]:] + d[1:r[0]]) 1275 if not length: 1276 # 1277 # one site we finish here. 1278 # 1279 return tuple(fragments) 1280 # 1281 # add the others. 1282 # 1283 fragments += [d[r[x]:r[x + 1]] for x in range(length)] 1284 return tuple(fragments)
1285 catalyze = catalyse 1286 1287 @classmethod
1288 - def is_blunt(self):
1289 """RE.is_blunt() -> bool. 1290 1291 True if the enzyme produces blunt end. 1292 1293 see also: 1294 RE.is_3overhang() 1295 RE.is_5overhang() 1296 RE.is_unknown()""" 1297 return False
1298 1299 @classmethod
1300 - def is_5overhang(self):
1301 """RE.is_5overhang() -> bool. 1302 1303 True if the enzyme produces 5' overhang sticky end. 1304 1305 see also: 1306 RE.is_3overhang() 1307 RE.is_blunt() 1308 RE.is_unknown()""" 1309 return False
1310 1311 @classmethod
1312 - def is_3overhang(self):
1313 """RE.is_3overhang() -> bool. 1314 1315 True if the enzyme produces 3' overhang sticky end. 1316 1317 see also: 1318 RE.is_5overhang() 1319 RE.is_blunt() 1320 RE.is_unknown()""" 1321 return True
1322 1323 @classmethod
1324 - def overhang(self):
1325 """RE.overhang() -> str. type of overhang of the enzyme., 1326 1327 can be "3' overhang", "5' overhang", "blunt", "unknown" """ 1328 return "3' overhang"
1329 1330 @classmethod
1331 - def compatible_end(self, batch=None):
1332 """RE.compatible_end() -> list. 1333 1334 list of all the enzymes that share compatible end with RE.""" 1335 if not batch: 1336 batch = AllEnzymes 1337 r = sorted(x for x in iter(AllEnzymes) if x.is_3overhang() and x % self) 1338 return r
1339 1340 @classmethod
1341 - def _mod1(self, other):
1342 """RE._mod1(other) -> bool. 1343 1344 for internal use only 1345 1346 test for the compatibility of restriction ending of RE and other.""" 1347 # 1348 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1349 # 1350 if issubclass(other, Ov3): 1351 return self._mod2(other) 1352 else: 1353 return False
1354
1355 1356 -class Defined(AbstractCut):
1357 """Implement the methods specific to the enzymes for which the overhang 1358 and the cut are not variable. 1359 1360 Typical example : EcoRI -> G^AATT_C 1361 The overhang will always be AATT 1362 Notes: 1363 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1364 There overhang is always the same : blunt! 1365 1366 Internal use only. Not meant to be instantiated.""" 1367 1368 @classmethod
1369 - def _drop(self):
1370 """RE._drop() -> list. 1371 1372 for internal use only. 1373 1374 drop the site that are situated outside the sequence in linear sequence. 1375 modify the index for site in circular sequences.""" 1376 # 1377 # remove or modify the results that are outside the sequence. 1378 # This is necessary since after finding the site we add the distance 1379 # from the site to the cut with the _modify and _rev_modify methods. 1380 # For linear we will remove these sites altogether. 1381 # For circular sequence, we modify the result rather than _drop it 1382 # since the site is in the sequence. 1383 # 1384 length = len(self.dna) 1385 drop = itertools.dropwhile 1386 take = itertools.takewhile 1387 if self.dna.is_linear(): 1388 self.results = [x for x in drop(lambda x:x < 1, self.results)] 1389 self.results = [x for x in take(lambda x:x < length, self.results)] 1390 else: 1391 for index, location in enumerate(self.results): 1392 if location < 1: 1393 self.results[index] += length 1394 else: 1395 break 1396 for index, location in enumerate(self.results[::-1]): 1397 if location > length: 1398 self.results[-(index + 1)] -= length 1399 else: 1400 break 1401 return
1402 1403 @classmethod
1404 - def is_defined(self):
1405 """RE.is_defined() -> bool. 1406 1407 True if the sequence recognised and cut is constant, 1408 i.e. the recognition site is not degenerated AND the enzyme cut inside 1409 the site. 1410 1411 see also: 1412 RE.is_ambiguous() 1413 RE.is_unknown()""" 1414 return True
1415 1416 @classmethod
1417 - def is_ambiguous(self):
1418 """RE.is_ambiguous() -> bool. 1419 1420 True if the sequence recognised and cut is ambiguous, 1421 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1422 the site. 1423 1424 see also: 1425 RE.is_defined() 1426 RE.is_unknown()""" 1427 return False
1428 1429 @classmethod
1430 - def is_unknown(self):
1431 """RE.is_unknown() -> bool. 1432 1433 True if the sequence is unknown, 1434 i.e. the recognition site has not been characterised yet. 1435 1436 see also: 1437 RE.is_defined() 1438 RE.is_ambiguous()""" 1439 return False
1440 1441 @classmethod
1442 - def elucidate(self):
1443 """RE.elucidate() -> str 1444 1445 return a representation of the site with the cut on the (+) strand 1446 represented as '^' and the cut on the (-) strand as '_'. 1447 ie: 1448 >>> EcoRI.elucidate() # 5' overhang 1449 'G^AATT_C' 1450 >>> KpnI.elucidate() # 3' overhang 1451 'G_GTAC^C' 1452 >>> EcoRV.elucidate() # blunt 1453 'GAT^_ATC' 1454 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1455 '? GTATAC ?' 1456 >>> 1457 """ 1458 f5 = self.fst5 1459 f3 = self.fst3 1460 site = self.site 1461 if self.cut_twice(): 1462 re = 'cut twice, not yet implemented sorry.' 1463 elif self.is_5overhang(): 1464 if f5 == f3 == 0: 1465 re = 'N^' + self.site + '_N' 1466 elif f3 == 0: 1467 re = site[:f5] + '^' + site[f5:] + '_N' 1468 else: 1469 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1470 elif self.is_blunt(): 1471 re = site[:f5] + '^_' + site[f5:] 1472 else: 1473 if f5 == f3 == 0: 1474 re = 'N_' + site + '^N' 1475 else: 1476 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1477 return re
1478 1479 @classmethod
1480 - def _mod2(self, other):
1481 """RE._mod2(other) -> bool. 1482 1483 for internal use only 1484 1485 test for the compatibility of restriction ending of RE and other.""" 1486 # 1487 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1488 # 1489 if other.ovhgseq == self.ovhgseq: 1490 return True 1491 elif issubclass(other, Ambiguous): 1492 return other._mod2(self) 1493 else: 1494 return False
1495
1496 1497 -class Ambiguous(AbstractCut):
1498 """Implement the methods specific to the enzymes for which the overhang 1499 is variable. 1500 1501 Typical example : BstXI -> CCAN_NNNN^NTGG 1502 The overhang can be any sequence of 4 bases. 1503 Notes: 1504 Blunt enzymes are always defined. even if there site is GGATCCNNN^_N 1505 There overhang is always the same : blunt! 1506 1507 Internal use only. Not meant to be instantiated.""" 1508 1509 @classmethod
1510 - def _drop(self):
1511 """RE._drop() -> list. 1512 1513 for internal use only. 1514 1515 drop the site that are situated outside the sequence in linear sequence. 1516 modify the index for site in circular sequences.""" 1517 length = len(self.dna) 1518 drop = itertools.dropwhile 1519 take = itertools.takewhile 1520 if self.dna.is_linear(): 1521 self.results = [x for x in drop(lambda x: x < 1, self.results)] 1522 self.results = [x for x in take(lambda x: x < length, self.results)] 1523 else: 1524 for index, location in enumerate(self.results): 1525 if location < 1: 1526 self.results[index] += length 1527 else: 1528 break 1529 for index, location in enumerate(self.results[::-1]): 1530 if location > length: 1531 self.results[-(index + 1)] -= length 1532 else: 1533 break 1534 return
1535 1536 @classmethod
1537 - def is_defined(self):
1538 """RE.is_defined() -> bool. 1539 1540 True if the sequence recognised and cut is constant, 1541 i.e. the recognition site is not degenerated AND the enzyme cut inside 1542 the site. 1543 1544 see also: 1545 RE.is_ambiguous() 1546 RE.is_unknown()""" 1547 return False
1548 1549 @classmethod
1550 - def is_ambiguous(self):
1551 """RE.is_ambiguous() -> bool. 1552 1553 True if the sequence recognised and cut is ambiguous, 1554 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1555 the site. 1556 1557 see also: 1558 RE.is_defined() 1559 RE.is_unknown()""" 1560 return True
1561 1562 @classmethod
1563 - def is_unknown(self):
1564 """RE.is_unknown() -> bool. 1565 1566 True if the sequence is unknown, 1567 i.e. the recognition site has not been characterised yet. 1568 1569 see also: 1570 RE.is_defined() 1571 RE.is_ambiguous()""" 1572 return False
1573 1574 @classmethod
1575 - def _mod2(self, other):
1576 """RE._mod2(other) -> bool. 1577 1578 for internal use only 1579 1580 test for the compatibility of restriction ending of RE and other.""" 1581 # 1582 # called by RE._mod1(other) when the one of the enzyme is ambiguous 1583 # 1584 if len(self.ovhgseq) != len(other.ovhgseq): 1585 return False 1586 else: 1587 se = self.ovhgseq 1588 for base in se: 1589 if base in 'ATCG': 1590 pass 1591 if base in 'N': 1592 se = '.'.join(se.split('N')) 1593 if base in 'RYWMSKHDBV': 1594 expand = '[' + matching[base] + ']' 1595 se = expand.join(se.split(base)) 1596 if re.match(se, other.ovhgseq): 1597 return True 1598 else: 1599 return False
1600 1601 @classmethod
1602 - def elucidate(self):
1603 """RE.elucidate() -> str 1604 1605 return a representation of the site with the cut on the (+) strand 1606 represented as '^' and the cut on the (-) strand as '_'. 1607 ie: 1608 >>> EcoRI.elucidate() # 5' overhang 1609 'G^AATT_C' 1610 >>> KpnI.elucidate() # 3' overhang 1611 'G_GTAC^C' 1612 >>> EcoRV.elucidate() # blunt 1613 'GAT^_ATC' 1614 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1615 '? GTATAC ?' 1616 >>> 1617 """ 1618 f5 = self.fst5 1619 f3 = self.fst3 1620 length = len(self) 1621 site = self.site 1622 if self.cut_twice(): 1623 re = 'cut twice, not yet implemented sorry.' 1624 elif self.is_5overhang(): 1625 if f3 == f5 == 0: 1626 re = 'N^' + site + '_N' 1627 elif 0 <= f5 <= length and 0 <= f3 + length <= length: 1628 re = site[:f5] + '^' + site[f5:f3] + '_' + site[f3:] 1629 elif 0 <= f5 <= length: 1630 re = site[:f5] + '^' + site[f5:] + f3 * 'N' + '_N' 1631 elif 0 <= f3 + length <= length: 1632 re = 'N^' + abs(f5) * 'N' + site[:f3] + '_' + site[f3:] 1633 elif f3 + length < 0: 1634 re = 'N^' * abs(f5) * 'N' + '_' + abs(length + f3) * 'N' + site 1635 elif f5 > length: 1636 re = site + (f5 - length) * 'N' + '^' + (length + f3 - f5) * 'N' + '_N' 1637 else: 1638 re = 'N^' + abs(f5) * 'N' + site + f3 * 'N' + '_N' 1639 elif self.is_blunt(): 1640 if f5 < 0: 1641 re = 'N^_' + abs(f5) * 'N' + site 1642 elif f5 > length: 1643 re = site + (f5 - length) * 'N' + '^_N' 1644 else: 1645 raise ValueError('%s.easyrepr() : error f5=%i' 1646 % (self.name, f5)) 1647 else: 1648 if f3 == 0: 1649 if f5 == 0: 1650 re = 'N_' + site + '^N' 1651 else: 1652 re = site + '_' + (f5 - length) * 'N' + '^N' 1653 elif 0 < f3 + length <= length and 0 <= f5 <= length: 1654 re = site[:f3] + '_' + site[f3:f5] + '^' + site[f5:] 1655 elif 0 < f3 + length <= length: 1656 re = site[:f3] + '_' + site[f3:] + (f5 - length) * 'N' + '^N' 1657 elif 0 <= f5 <= length: 1658 re = 'N_' + 'N' * (f3 + length) + site[:f5] + '^' + site[f5:] 1659 elif f3 > 0: 1660 re = site + f3 * 'N' + '_' + (f5 - f3 - length) * 'N' + '^N' 1661 elif f5 < 0: 1662 re = 'N_' + abs(f3 - f5 + length) * 'N' + '^' + abs(f5) * 'N' + site 1663 else: 1664 re = 'N_' + abs(f3 + length) * 'N' + site + (f5 - length) * 'N' + '^N' 1665 return re
1666
1667 1668 -class NotDefined(AbstractCut):
1669 """Implement the methods specific to the enzymes for which the overhang 1670 is not characterised. 1671 1672 Correspond to NoCut and Unknown. 1673 1674 Internal use only. Not meant to be instantiated.""" 1675 1676 @classmethod
1677 - def _drop(self):
1678 """RE._drop() -> list. 1679 1680 for internal use only. 1681 1682 drop the site that are situated outside the sequence in linear sequence. 1683 modify the index for site in circular sequences.""" 1684 if self.dna.is_linear(): 1685 return 1686 else: 1687 length = len(self.dna) 1688 for index, location in enumerate(self.results): 1689 if location < 1: 1690 self.results[index] += length 1691 else: 1692 break 1693 for index, location in enumerate(self.results[:-1]): 1694 if location > length: 1695 self.results[-(index + 1)] -= length 1696 else: 1697 break 1698 return
1699 1700 @classmethod
1701 - def is_defined(self):
1702 """RE.is_defined() -> bool. 1703 1704 True if the sequence recognised and cut is constant, 1705 i.e. the recognition site is not degenerated AND the enzyme cut inside 1706 the site. 1707 1708 see also: 1709 RE.is_ambiguous() 1710 RE.is_unknown()""" 1711 return False
1712 1713 @classmethod
1714 - def is_ambiguous(self):
1715 """RE.is_ambiguous() -> bool. 1716 1717 True if the sequence recognised and cut is ambiguous, 1718 i.e. the recognition site is degenerated AND/OR the enzyme cut outside 1719 the site. 1720 1721 see also: 1722 RE.is_defined() 1723 RE.is_unknown()""" 1724 return False
1725 1726 @classmethod
1727 - def is_unknown(self):
1728 """RE.is_unknown() -> bool. 1729 1730 True if the sequence is unknown, 1731 i.e. the recognition site has not been characterised yet. 1732 1733 see also: 1734 RE.is_defined() 1735 RE.is_ambiguous()""" 1736 return True
1737 1738 @classmethod
1739 - def _mod2(self, other):
1740 """RE._mod2(other) -> bool. 1741 1742 for internal use only 1743 1744 test for the compatibility of restriction ending of RE and other.""" 1745 # 1746 # Normally we should not arrive here. But well better safe than sorry. 1747 # the overhang is not defined we are compatible with nobody. 1748 # could raise an Error may be rather than return quietly. 1749 # 1750 # return False 1751 raise ValueError("%s.mod2(%s), %s : NotDefined. pas glop pas glop!" 1752 % (str(self), str(other), str(self)))
1753 1754 @classmethod
1755 - def elucidate(self):
1756 """RE.elucidate() -> str 1757 1758 return a representation of the site with the cut on the (+) strand 1759 represented as '^' and the cut on the (-) strand as '_'. 1760 ie: 1761 >>> EcoRI.elucidate() # 5' overhang 1762 'G^AATT_C' 1763 >>> KpnI.elucidate() # 3' overhang 1764 'G_GTAC^C' 1765 >>> EcoRV.elucidate() # blunt 1766 'GAT^_ATC' 1767 >>> SnaI.elucidate() # NotDefined, cut profile unknown. 1768 '? GTATAC ?' 1769 >>> 1770 """ 1771 return '? %s ?' % self.site
1772
1773 1774 -class Commercially_available(AbstractCut):
1775 # 1776 # Recent addition to Rebase make this naming convention uncertain. 1777 # May be better to says enzymes which have a supplier. 1778 # 1779 """Implement the methods specific to the enzymes which are commercially 1780 available. 1781 1782 Internal use only. Not meant to be instantiated.""" 1783 1784 @classmethod
1785 - def suppliers(self):
1786 """RE.suppliers() -> print the suppliers of RE.""" 1787 for s in self.suppliers_dict(): 1788 print(s + ',') 1789 return
1790 1791 @classmethod
1792 - def supplier_list(self):
1793 """RE.supplier_list() -> list. 1794 1795 list of the supplier names for RE.""" 1796 return [v[0] for k, v in suppliers_dict.items() if k in self.suppl]
1797 1798 @classmethod
1799 - def buffers(self, supplier):
1800 """RE.buffers(supplier) -> string. 1801 1802 not implemented yet.""" 1803 return
1804 1805 @classmethod
1806 - def is_comm(self):
1807 """RE.iscomm() -> bool. 1808 1809 True if RE has suppliers.""" 1810 return True
1811
1812 1813 -class Not_available(AbstractCut):
1814 """Implement the methods specific to the enzymes which are not commercially 1815 available. 1816 1817 Internal use only. Not meant to be instantiated.""" 1818 1819 @staticmethod
1820 - def suppliers():
1821 """RE.suppliers() -> print the suppliers of RE.""" 1822 return None
1823 1824 @classmethod
1825 - def supplier_list(self):
1826 """RE.supplier_list() -> list. 1827 1828 list of the supplier names for RE.""" 1829 return []
1830 1831 @classmethod
1832 - def buffers(self, supplier):
1833 """RE.buffers(supplier) -> string. 1834 1835 not implemented yet.""" 1836 raise TypeError("Enzyme not commercially available.")
1837 1838 @classmethod
1839 - def is_comm(self):
1840 """RE.iscomm() -> bool. 1841 1842 True if RE has suppliers.""" 1843 return False
1844
1845 1846 ############################################################################### 1847 # # 1848 # Restriction Batch # 1849 # # 1850 ############################################################################### 1851 1852 1853 -class RestrictionBatch(set):
1854
1855 - def __init__(self, first=[], suppliers=[]):
1856 """RestrictionBatch([sequence]) -> new RestrictionBatch.""" 1857 first = [self.format(x) for x in first] 1858 first += [eval(x) for n in suppliers for x in suppliers_dict[n][1]] 1859 set.__init__(self, first) 1860 self.mapping = dict.fromkeys(self) 1861 self.already_mapped = None
1862
1863 - def __str__(self):
1864 if len(self) < 5: 1865 return '+'.join(self.elements()) 1866 else: 1867 return '...'.join(('+'.join(self.elements()[:2]), 1868 '+'.join(self.elements()[-2:])))
1869
1870 - def __repr__(self):
1871 return 'RestrictionBatch(%s)' % self.elements()
1872
1873 - def __contains__(self, other):
1874 try: 1875 other = self.format(other) 1876 except ValueError: # other is not a restriction enzyme 1877 return False 1878 return set.__contains__(self, other)
1879
1880 - def __div__(self, other):
1881 return self.search(other)
1882
1883 - def __rdiv__(self, other):
1884 return self.search(other)
1885
1886 - def get(self, enzyme, add=False):
1887 """B.get(enzyme[, add]) -> enzyme class. 1888 1889 if add is True and enzyme is not in B add enzyme to B. 1890 if add is False (which is the default) only return enzyme. 1891 if enzyme is not a RestrictionType or can not be evaluated to 1892 a RestrictionType, raise a ValueError.""" 1893 e = self.format(enzyme) 1894 if e in self: 1895 return e 1896 elif add: 1897 self.add(e) 1898 return e 1899 else: 1900 raise ValueError('enzyme %s is not in RestrictionBatch' 1901 % e.__name__)
1902
1903 - def lambdasplit(self, func):
1904 """B.lambdasplit(func) -> RestrictionBatch . 1905 1906 the new batch will contains only the enzymes for which 1907 func return True.""" 1908 d = [x for x in filter(func, self)] 1909 new = RestrictionBatch() 1910 new._data = dict(zip(d, [True] * len(d))) 1911 return new
1912
1913 - def add_supplier(self, letter):
1914 """B.add_supplier(letter) -> add a new set of enzyme to B. 1915 1916 letter represents the suppliers as defined in the dictionary 1917 RestrictionDictionary.suppliers 1918 return None. 1919 raise a KeyError if letter is not a supplier code.""" 1920 supplier = suppliers_dict[letter] 1921 self.suppliers.append(letter) 1922 for x in supplier[1]: 1923 self.add_nocheck(eval(x)) 1924 return
1925
1926 - def current_suppliers(self):
1927 """B.current_suppliers() -> add a new set of enzyme to B. 1928 1929 return a sorted list of the suppliers which have been used to 1930 create the batch.""" 1931 suppl_list = sorted(suppliers_dict[x][0] for x in self.suppliers) 1932 return suppl_list
1933
1934 - def __iadd__(self, other):
1935 """ b += other -> add other to b, check the type of other.""" 1936 self.add(other) 1937 return self
1938
1939 - def __add__(self, other):
1940 """ b + other -> new RestrictionBatch.""" 1941 new = self.__class__(self) 1942 new.add(other) 1943 return new
1944
1945 - def remove(self, other):
1946 """B.remove(other) -> remove other from B if other is a RestrictionType. 1947 1948 Safe set.remove method. Verify that other is a RestrictionType or can be 1949 evaluated to a RestrictionType. 1950 raise a ValueError if other can not be evaluated to a RestrictionType. 1951 raise a KeyError if other is not in B.""" 1952 return set.remove(self, self.format(other))
1953
1954 - def add(self, other):
1955 """B.add(other) -> add other to B if other is a RestrictionType. 1956 1957 Safe set.add method. Verify that other is a RestrictionType or can be 1958 evaluated to a RestrictionType. 1959 raise a ValueError if other can not be evaluated to a RestrictionType. 1960 """ 1961 return set.add(self, self.format(other))
1962
1963 - def add_nocheck(self, other):
1964 """B.add_nocheck(other) -> add other to B. don't check type of other. 1965 """ 1966 return set.add(self, other)
1967
1968 - def format(self, y):
1969 """B.format(y) -> RestrictionType or raise ValueError. 1970 1971 if y is a RestrictionType return y 1972 if y can be evaluated to a RestrictionType return eval(y) 1973 raise a Value Error in all other case.""" 1974 try: 1975 if isinstance(y, RestrictionType): 1976 return y 1977 elif isinstance(eval(str(y)), RestrictionType): 1978 return eval(y) 1979 else: 1980 pass 1981 except (NameError, SyntaxError): 1982 pass 1983 raise ValueError('%s is not a RestrictionType' % y.__class__)
1984
1985 - def is_restriction(self, y):
1986 """B.is_restriction(y) -> bool. 1987 1988 True is y or eval(y) is a RestrictionType.""" 1989 return isinstance(y, RestrictionType) or \ 1990 isinstance(eval(str(y)), RestrictionType)
1991
1992 - def split(self, *classes, **bool):
1993 """B.split(class, [class.__name__ = True]) -> new RestrictionBatch. 1994 1995 it works but it is slow, so it has really an interest when splitting 1996 over multiple conditions.""" 1997 def splittest(element): 1998 for klass in classes: 1999 b = bool.get(klass.__name__, True) 2000 if issubclass(element, klass): 2001 if b: 2002 continue 2003 else: 2004 return False 2005 elif b: 2006 return False 2007 else: 2008 continue 2009 return True
2010 d = [k for k in filter(splittest, self)] 2011 new = RestrictionBatch() 2012 new._data = dict(zip(d, [True] * len(d))) 2013 return new
2014
2015 - def elements(self):
2016 """B.elements() -> tuple. 2017 2018 give all the names of the enzymes in B sorted alphabetically.""" 2019 l = sorted(str(e) for e in self) 2020 return l
2021
2022 - def as_string(self):
2023 """B.as_string() -> list. 2024 2025 return a list of the name of the elements of B.""" 2026 return [str(e) for e in self]
2027 2028 @classmethod
2029 - def suppl_codes(self):
2030 """B.suppl_codes() -> dict 2031 2032 letter code for the suppliers""" 2033 supply = dict((k, v[0]) for k, v in suppliers_dict.items()) 2034 return supply
2035 2036 @classmethod
2037 - def show_codes(self):
2038 """B.show_codes() -> letter codes for the suppliers""" 2039 supply = [' = '.join(i) for i in self.suppl_codes().items()] 2040 print('\n'.join(supply)) 2041 return
2042
2043 - def search(self, dna, linear=True):
2044 """B.search(dna) -> dict.""" 2045 # 2046 # here we replace the search method of the individual enzymes 2047 # with one unique testing method. 2048 # 2049 if not hasattr(self, "already_mapped"): 2050 # TODO - Why does this happen! 2051 # Try the "doctest" at the start of PrintFormat.py 2052 self.already_mapped = None 2053 if isinstance(dna, DNA): 2054 # For the searching, we just care about the sequence as a string, 2055 # if that is the same we can use the cached search results. 2056 # At the time of writing, Seq == method isn't implemented, 2057 # and therefore does object identity which is stricter. 2058 if (str(dna), linear) == self.already_mapped: 2059 return self.mapping 2060 else: 2061 self.already_mapped = str(dna), linear 2062 fseq = FormattedSeq(dna, linear) 2063 self.mapping = dict((x, x.search(fseq)) for x in self) 2064 return self.mapping 2065 elif isinstance(dna, FormattedSeq): 2066 if (str(dna), dna.linear) == self.already_mapped: 2067 return self.mapping 2068 else: 2069 self.already_mapped = str(dna), dna.linear 2070 self.mapping = dict((x, x.search(dna)) for x in self) 2071 return self.mapping 2072 raise TypeError("Expected Seq or MutableSeq instance, got %s instead" 2073 % type(dna))
2074
2075 ############################################################################### 2076 # # 2077 # Restriction Analysis # 2078 # # 2079 ############################################################################### 2080 2081 2082 -class Analysis(RestrictionBatch, PrintFormat):
2083
2084 - def __init__(self, restrictionbatch=RestrictionBatch(), sequence=DNA(''), 2085 linear=True):
2086 """Analysis([restrictionbatch [, sequence] linear=True]) -> New Analysis class. 2087 2088 For most of the method of this class if a dictionary is given it will 2089 be used as the base to calculate the results. 2090 If no dictionary is given a new analysis using the Restriction Batch 2091 which has been given when the Analysis class has been instantiated.""" 2092 RestrictionBatch.__init__(self, restrictionbatch) 2093 self.rb = restrictionbatch 2094 self.sequence = sequence 2095 self.linear = linear 2096 if self.sequence: 2097 self.search(self.sequence, self.linear)
2098
2099 - def __repr__(self):
2100 return 'Analysis(%s,%s,%s)' %\ 2101 (repr(self.rb), repr(self.sequence), self.linear)
2102
2103 - def _sub_set(self, wanted):
2104 """A._sub_set(other_set) -> dict. 2105 2106 Internal use only. 2107 2108 screen the results through wanted set. 2109 Keep only the results for which the enzymes is in wanted set. 2110 """ 2111 return dict((k, v) for k, v in self.mapping.items() if k in wanted)
2112
2113 - def _boundaries(self, start, end):
2114 """A._boundaries(start, end) -> tuple. 2115 2116 Format the boundaries for use with the methods that limit the 2117 search to only part of the sequence given to analyse. 2118 """ 2119 if not isinstance(start, int): 2120 raise TypeError('expected int, got %s instead' % type(start)) 2121 if not isinstance(end, int): 2122 raise TypeError('expected int, got %s instead' % type(end)) 2123 if start < 1: 2124 start += len(self.sequence) 2125 if end < 1: 2126 end += len(self.sequence) 2127 if start < end: 2128 pass 2129 else: 2130 start, end == end, start 2131 if start < 1: 2132 start == 1 2133 if start < end: 2134 return start, end, self._test_normal 2135 else: 2136 return start, end, self._test_reverse
2137
2138 - def _test_normal(self, start, end, site):
2139 """A._test_normal(start, end, site) -> bool. 2140 2141 Internal use only 2142 Test if site is in between start and end. 2143 """ 2144 return start <= site < end
2145
2146 - def _test_reverse(self, start, end, site):
2147 """A._test_reverse(start, end, site) -> bool. 2148 2149 Internal use only 2150 Test if site is in between end and start (for circular sequences). 2151 """ 2152 return start <= site <= len(self.sequence) or 1 <= site < end
2153
2154 - def print_that(self, dct=None, title='', s1=''):
2155 """A.print_that([dct[, title[, s1]]]) -> print the results from dct. 2156 2157 If dct is not given the full dictionary is used. 2158 """ 2159 if not dct: 2160 dct = self.mapping 2161 print("") 2162 return PrintFormat.print_that(self, dct, title, s1)
2163
2164 - def change(self, **what):
2165 """A.change(**attribute_name) -> Change attribute of Analysis. 2166 2167 It is possible to change the width of the shell by setting 2168 self.ConsoleWidth to what you want. 2169 self.NameWidth refer to the maximal length of the enzyme name. 2170 2171 Changing one of these parameters here might not give the results 2172 you expect. In which case, you can settle back to a 80 columns shell 2173 or try to change self.Cmodulo and self.PrefWidth in PrintFormat until 2174 you get it right.""" 2175 for k, v in what.items(): 2176 if k in ('NameWidth', 'ConsoleWidth'): 2177 setattr(self, k, v) 2178 self.Cmodulo = self.ConsoleWidth % self.NameWidth 2179 self.PrefWidth = self.ConsoleWidth - self.Cmodulo 2180 elif k is 'sequence': 2181 setattr(self, 'sequence', v) 2182 self.search(self.sequence, self.linear) 2183 elif k is 'rb': 2184 self = Analysis.__init__(self, v, self.sequence, self.linear) 2185 elif k is 'linear': 2186 setattr(self, 'linear', v) 2187 self.search(self.sequence, v) 2188 elif k in ('Indent', 'Maxsize'): 2189 setattr(self, k, v) 2190 elif k in ('Cmodulo', 'PrefWidth'): 2191 raise AttributeError( 2192 'To change %s, change NameWidth and/or ConsoleWidth' 2193 % name) 2194 else: 2195 raise AttributeError( 2196 'Analysis has no attribute %s' % name) 2197 return
2198
2199 - def full(self, linear=True):
2200 """A.full() -> dict. 2201 2202 Full Restriction Map of the sequence.""" 2203 return self.mapping
2204
2205 - def blunt(self, dct=None):
2206 """A.blunt([dct]) -> dict. 2207 2208 Only the enzymes which have a 3'overhang restriction site.""" 2209 if not dct: 2210 dct = self.mapping 2211 return dict((k, v) for k, v in dct.items() if k.is_blunt())
2212
2213 - def overhang5(self, dct=None):
2214 """A.overhang5([dct]) -> dict. 2215 2216 Only the enzymes which have a 5' overhang restriction site.""" 2217 if not dct: 2218 dct = self.mapping 2219 return dict((k, v) for k, v in dct.items() if k.is_5overhang())
2220
2221 - def overhang3(self, dct=None):
2222 """A.Overhang3([dct]) -> dict. 2223 2224 Only the enzymes which have a 3'overhang restriction site.""" 2225 if not dct: 2226 dct = self.mapping 2227 return dict((k, v) for k, v in dct.items() if k.is_3overhang())
2228
2229 - def defined(self, dct=None):
2230 """A.defined([dct]) -> dict. 2231 2232 Only the enzymes that have a defined restriction site in Rebase.""" 2233 if not dct: 2234 dct = self.mapping 2235 return dict((k, v) for k, v in dct.items() if k.is_defined())
2236
2237 - def with_sites(self, dct=None):
2238 """A.with_sites([dct]) -> dict. 2239 2240 Enzymes which have at least one site in the sequence.""" 2241 if not dct: 2242 dct = self.mapping 2243 return dict((k, v) for k, v in dct.items() if v)
2244
2245 - def without_site(self, dct=None):
2246 """A.without_site([dct]) -> dict. 2247 2248 Enzymes which have no site in the sequence.""" 2249 if not dct: 2250 dct = self.mapping 2251 return dict((k, v) for k, v in dct.items() if not v)
2252
2253 - def with_N_sites(self, N, dct=None):
2254 """A.With_N_Sites(N [, dct]) -> dict. 2255 2256 Enzymes which cut N times the sequence.""" 2257 if not dct: 2258 dct = self.mapping 2259 return dict((k, v) for k, v in dct.items()if len(v) == N)
2260
2261 - def with_number_list(self, list, dct=None):
2262 if not dct: 2263 dct = self.mapping 2264 return dict((k, v) for k, v in dct.items() if len(v) in list)
2265
2266 - def with_name(self, names, dct=None):
2267 """A.with_name(list_of_names [, dct]) -> 2268 2269 Limit the search to the enzymes named in list_of_names.""" 2270 for i, enzyme in enumerate(names): 2271 if enzyme not in AllEnzymes: 2272 print("no data for the enzyme: %s" % name) 2273 del names[i] 2274 if not dct: 2275 return RestrictionBatch(names).search(self.sequence) 2276 return dict((n, dct[n]) for n in names if n in dct)
2277
2278 - def with_site_size(self, site_size, dct=None):
2279 """A.with_site_size(site_size [, dct]) -> 2280 2281 Limit the search to the enzymes whose site is of size <site_size>.""" 2282 sites = [name for name in self if name.size == site_size] 2283 if not dct: 2284 return RestrictionBatch(sites).search(self.sequence) 2285 return dict((k, v) for k, v in dct.items() if k in site_size)
2286
2287 - def only_between(self, start, end, dct=None):
2288 """A.only_between(start, end[, dct]) -> dict. 2289 2290 Enzymes that cut the sequence only in between start and end.""" 2291 start, end, test = self._boundaries(start, end) 2292 if not dct: 2293 dct = self.mapping 2294 d = dict(dct) 2295 for key, sites in dct.items(): 2296 if not sites: 2297 del d[key] 2298 continue 2299 for site in sites: 2300 if test(start, end, site): 2301 continue 2302 else: 2303 del d[key] 2304 break 2305 return d
2306
2307 - def between(self, start, end, dct=None):
2308 """A.between(start, end [, dct]) -> dict. 2309 2310 Enzymes that cut the sequence at least in between start and end. 2311 They may cut outside as well.""" 2312 start, end, test = self._boundaries(start, end) 2313 d = {} 2314 if not dct: 2315 dct = self.mapping 2316 for key, sites in dct.items(): 2317 for site in sites: 2318 if test(start, end, site): 2319 d[key] = sites 2320 break 2321 continue 2322 return d
2323
2324 - def show_only_between(self, start, end, dct=None):
2325 """A.show_only_between(start, end [, dct]) -> dict. 2326 2327 Enzymes that cut the sequence outside of the region 2328 in between start and end but do not cut inside.""" 2329 d = [] 2330 if start <= end: 2331 d = [(k, [vv for vv in v if start <= vv <= end]) 2332 for v in self.between(start, end, dct)] 2333 else: 2334 d = [(k, [vv for vv in v if start <= vv or vv <= end]) 2335 for v in self.between(start, end, dct)] 2336 return dict(d)
2337
2338 - def only_outside(self, start, end, dct=None):
2339 """A.only_outside(start, end [, dct]) -> dict. 2340 2341 Enzymes that cut the sequence outside of the region 2342 in between start and end but do not cut inside.""" 2343 start, end, test = self._boundaries(start, end) 2344 if not dct: 2345 dct = self.mapping 2346 d = dict(dct) 2347 for key, sites in dct.items(): 2348 if not sites: 2349 del d[key] 2350 continue 2351 for site in sites: 2352 if test(start, end, site): 2353 del d[key] 2354 break 2355 else: 2356 continue 2357 return d
2358
2359 - def outside(self, start, end, dct=None):
2360 """A.outside((start, end [, dct]) -> dict. 2361 2362 Enzymes that cut outside the region in between start and end. 2363 No test is made to know if they cut or not inside this region.""" 2364 start, end, test = self._boundaries(start, end) 2365 if not dct: 2366 dct = self.mapping 2367 d = {} 2368 for key, sites in dct.items(): 2369 for site in sites: 2370 if test(start, end, site): 2371 continue 2372 else: 2373 d[key] = sites 2374 break 2375 return d
2376
2377 - def do_not_cut(self, start, end, dct=None):
2378 """A.do_not_cut(start, end [, dct]) -> dict. 2379 2380 Enzymes that do not cut the region in between start and end.""" 2381 if not dct: 2382 dct = self.mapping 2383 d = self.without_site() 2384 d.update(self.only_outside(start, end, dct)) 2385 return d
2386 2387 # 2388 # The restriction enzyme classes are created dynamically when the module is 2389 # imported. Here is the magic which allow the creation of the 2390 # restriction-enzyme classes. 2391 # 2392 # The reason for the two dictionaries in Restriction_Dictionary 2393 # one for the types (which will be called pseudo-type as they really 2394 # correspond to the values that instances of RestrictionType can take) 2395 # and one for the enzymes is efficiency as the bases are evaluated 2396 # once per pseudo-type. 2397 # 2398 # However Restriction is still a very inefficient module at import. But 2399 # remember that around 660 classes (which is more or less the size of Rebase) 2400 # have to be created dynamically. However, this processing take place only 2401 # once. 2402 # This inefficiency is however largely compensated by the use of metaclass 2403 # which provide a very efficient layout for the class themselves mostly 2404 # alleviating the need of if/else loops in the class methods. 2405 # 2406 # It is essential to run Restriction with doc string optimisation (-OO switch) 2407 # as the doc string of 660 classes take a lot of processing. 2408 # 2409 CommOnly = RestrictionBatch() # commercial enzymes 2410 NonComm = RestrictionBatch() # not available commercially 2411 for TYPE, (bases, enzymes) in typedict.items(): 2412 # 2413 # The keys are the pseudo-types TYPE (stored as type1, type2...) 2414 # The names are not important and are only present to differentiate 2415 # the keys in the dict. All the pseudo-types are in fact RestrictionType. 2416 # These names will not be used after and the pseudo-types are not 2417 # kept in the locals() dictionary. It is therefore impossible to 2418 # import them. 2419 # Now, if you have look at the dictionary, you will see that not all the 2420 # types are present as those without corresponding enzymes have been 2421 # removed by Dictionary_Builder(). 2422 # 2423 # The values are tuples which contain 2424 # as first element a tuple of bases (as string) and 2425 # as second element the names of the enzymes. 2426 # 2427 # First eval the bases. 2428 # 2429 bases = tuple(eval(x) for x in bases) 2430 # 2431 # now create the particular value of RestrictionType for the classes 2432 # in enzymes. 2433 # 2434 T = type.__new__(RestrictionType, 'RestrictionType', bases, {}) 2435 for k in enzymes: 2436 # 2437 # Now, we go through all the enzymes and assign them their type. 2438 # enzymedict[k] contains the values of the attributes for this 2439 # particular class (self.site, self.ovhg,....). 2440 # 2441 newenz = T(k, bases, enzymedict[k]) 2442 # 2443 # we add the enzymes to the corresponding batch. 2444 # 2445 # No need to verify the enzyme is a RestrictionType -> add_nocheck 2446 # 2447 if newenz.is_comm(): 2448 CommOnly.add_nocheck(newenz) 2449 else: 2450 NonComm.add_nocheck(newenz) 2451 # 2452 # AllEnzymes is a RestrictionBatch with all the enzymes from Rebase. 2453 # 2454 AllEnzymes = CommOnly | NonComm 2455 # 2456 # Now, place the enzymes in locals so they can be imported. 2457 # 2458 names = [str(x) for x in AllEnzymes] 2459 try: 2460 del x 2461 except NameError: 2462 # Scoping changed in Python 3, the variable isn't leaked 2463 pass 2464 locals().update(dict(zip(names, AllEnzymes))) 2465 __all__ = ['FormattedSeq', 'Analysis', 'RestrictionBatch', 'AllEnzymes', 'CommOnly', 'NonComm'] + names 2466 del k, enzymes, TYPE, bases, names 2467